library(tidyverse)
library(plotrix) #std.error
library(hypr) #for generating contrast coding
library(ggh4x)
library(ggthemes)
library(DiagrammeR) #grViz for causal diagrams
library(plotly) #ggplotly for interactive plots
library(ggExtra) #ggMarginal
library(brms) #bayesian regression
library(bayestestR) #p_direction, hdi
library(tidybayes) #add_epred_draws
library(emmeans)
library(doParallel) #set the number of cores
### Set global options
options(digits = 3) # set the default number of digits to 3
cores = as.integer(detectCores(logical = FALSE) * 0.7) # set the number of cores to use
registerDoParallel(cores=cores) # register the number of cores to use for parallel processing
options(mc.cores = cores)
options(brms.backend = "cmdstanr") #this will speed up the model fitting
### MCMC options
niter = 20000 #number of iterations
nwu = 2000 #number of warmups
### Rmd settings
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE)
### Custom functions
########### pp_check_each_round ############
pp_check_each_round <- function(m, data, round_info) {
df_sub = filter(data, round == round_info)
p = pp_check(m,
type = "bars",
ndraws = 100,
newdata = df_sub) +
coord_cartesian(xlim = c(0, 10)) +
ggtitle(round_info)
return(p)
}
########### pp_check_each_condition ############
pp_check_each_condition <- function(m, data, condition_info) {
df_sub = filter(data, condition == condition_info)
p = pp_check(m,
type = "bars",
ndraws = 100,
newdata = df_sub) +
coord_cartesian(xlim = c(0, 10)) +
ggtitle(condition_info)
return(p)
}
########### plot_posterior ############
plot_posterior <- function(model, model2=NA, interaction=FALSE, include_intercept=FALSE,
xlim_cond=0.3, xlim_round=0.06){
### extract the posterior draws
posterior_beta1 <- model %>%
gather_draws(`b_.*`, regex = TRUE) %>%
mutate(intercept = str_detect(.variable, "Intercept"),
component = ifelse(str_detect(.variable, ":"), "Interaction",
ifelse(str_detect(.variable, "round"), "Round",
ifelse(str_detect(.variable, "Intercept"), "Intercept",
"Visibility"))))
if (length(model2) == 1){ #if model2 is NA
posterior_beta = posterior_beta1
} else {
posterior_beta2 <- model2 %>%
gather_draws(`b_.*`, regex = TRUE) %>%
filter(.variable == "b_condition_sumAO_Sym") %>%
mutate(component = "Visibility")
posterior_beta = rbind(posterior_beta1, posterior_beta2)
}
if (include_intercept == F){
posterior_beta = posterior_beta %>% filter(component != "Intercept")
}
posterior_beta = posterior_beta %>%
mutate(.variable = recode(.variable,
"b_Intercept" = "Intercept",
"b_conditionAsymAV" = "SymAV--AsymAV",
"b_conditionAO" = "SymAV--AO",
"b_conditionAsym_Sym" = "AsymAV--SymAV",
"b_conditionAO_Asym" = "AO--AsymAV",
"b_condition_sumAO_Sym" = "AO--SymAV",
"b_round_c" = "Centered round",
"b_log_round_c" = "Centered log(round)",
"b_conditionAsym_Sym:round_c" = "Centered round: Asym--Sym",
"b_conditionAO_Sym:round_c" = "Centered round: AO--Sym",
"b_conditionAO_Asym:round_c" = "Centered round: AO--Asym",
"b_conditionAsym_Sym:log_round_c" = "Centered log(round): Asym--Sym",
"b_conditionAO_Sym:log_round_c" = "Centered log(round): AO--Sym",
"b_conditionAO_Asym:log_round_c" = "Centered log(round): AO--Asym"),
.variable = factor(.variable,
levels = c("AO--SymAV", "AO--AsymAV", "AsymAV--SymAV",
"Centered round")),
component = factor(component,
levels = c("Intercept", "Visibility", "Round", "Interaction")))
### change variables if only main effects are plotted
if (interaction == F) {
posterior_beta = filter(posterior_beta, !str_detect(.variable, ":"))
fill_manual_values = c("steelblue", "steelblue")
} else{
fill_manual_values = c("steelblue", "steelblue", "steelblue")
}
### plot the posterior distributions
p_posterior = ggplot(posterior_beta,
aes(x = .value, y = fct_rev(.variable),
fill = component)) +
geom_vline(xintercept = 0, size = 1) +
stat_halfeye(aes(slab_alpha = intercept),
normalize = "panels",
slab_alpha = 0.5,
.width = c(0.89, 0.95),
point_interval = "median_hdi") +
scale_fill_manual(values = fill_manual_values) +
scale_slab_alpha_discrete(range = c(0.8, 0.4)) +
guides(fill = "none", slab_alpha = "none") +
labs(x = "Coefficient", y = "Effect") +
theme_clean(base_size = 15) +
theme(axis.text.x = element_text(colour = "black", size = 14),
axis.text.y = element_text(colour = "black", size = 14),
axis.title = element_text(size = 15, face = 'bold'),
axis.title.x = element_text(vjust = -2),
axis.title.y = element_text(vjust = 2),
legend.position = "none",
strip.text = element_text(size = 15, face = 'bold'),
strip.background = element_blank(),
panel.grid.major.x = element_line(color = "grey90",
linetype = "solid",
size = 0.5),
panel.grid.major.y = element_line(color = "grey90",
linetype = "solid",
size = 0.5),
plot.background = element_blank(),
plot.margin = unit(c(1.1,1.1,1.1,1.1), "lines")) +
facet_wrap(vars(component), ncol = 1, scales = "free") +
facetted_pos_scales(
x = list(
scale_x_continuous(limits = c(-xlim_cond, xlim_cond),
breaks = c(-0.3, 0, 0.3)),
scale_x_continuous(limits = c(-xlim_round, xlim_round),
breaks = c(-0.05, 0, 0.05))))
return(p_posterior)
}
########### pp_update_plot ############
pp_update_plot <- function(post_sample, interaction=TRUE){
sum = ifelse("b_condition_sumAO_Sym" %in% colnames(post_sample), T, F)
intercept = ggplot(post_sample) +
geom_density(aes(prior_Intercept), fill="steelblue", color="black",alpha=0.6) +
geom_density(aes(b_Intercept), fill="#FC4E07", color="black",alpha=0.6) +
xlab('Intercept') +
theme_classic()
### Visibility condition
if (sum == F){
cond1 = ggplot(post_sample) +
geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
geom_density(aes(b_conditionAsym_Sym), fill="#FC4E07", color="black",alpha=0.6) +
xlab('Asym--Sym') +
theme_classic()
cond2 = ggplot(post_sample) +
geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
geom_density(aes(b_conditionAO_Asym), fill="#FC4E07", color="black",alpha=0.6) +
xlab('AO--Asym') +
theme_classic()
} else {
cond1 = ggplot(post_sample) +
geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
geom_density(aes(b_condition_sumAO_Sym), fill="#FC4E07", color="black",alpha=0.6) +
xlab('AO--Sym') +
theme_classic()
cond2 = ggplot(post_sample) +
geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
geom_density(aes(b_condition_sumAsym_Sym), fill="#FC4E07", color="black",alpha=0.6) +
xlab('Asym--Sym') +
theme_classic()
}
### Round
if (interaction) {
round = ggplot(post_sample) +
geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
geom_density(aes(b_round_c), fill="#FC4E07", color="black",alpha=0.6) +
xlab('Centered round') +
theme_classic()
if (sum == F){
cond1_round = ggplot(post_sample) +
geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
geom_density(aes(`b_conditionAsym_Sym:round_c`), fill="#FC4E07", color="black",alpha=0.6) +
xlab('Centered Round: Asym--Sym') +
theme_classic()
cond2_round = ggplot(post_sample) +
geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
geom_density(aes(`b_conditionAO_Asym:round_c`), fill="#FC4E07", color="black",alpha=0.6) +
xlab('Centered Round: AO--Asym') +
theme_classic()
} else {
cond1_round = ggplot(post_sample) +
geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
geom_density(aes(`b_condition_sumAO_Sym:round_c`), fill="#FC4E07", color="black",alpha=0.6) +
xlab('Centered Round: AO--Sym') +
theme_classic()
cond2_round = ggplot(post_sample) +
geom_density(aes(prior_b), fill="steelblue", color="black",alpha=0.6) +
geom_density(aes(`b_condition_sumAsym_Sym:round_c`), fill="#FC4E07", color="black",alpha=0.6) +
xlab('Centered Round: Asym--Sym') +
theme_classic()
}}
### display the plots
if (interaction==F){
gridExtra::grid.arrange(intercept, cond1, cond2, ncol=2)
} else {
gridExtra::grid.arrange(intercept, cond1, cond2, round,
cond1_round, cond2_round,
ncol=2)
}
}
Here, we will load the dtw_distance.csv dataframe.
### dtw_distance.csv
fct_columns = c("pair", "referent",
"speaker_1", "round_1", "trial_1", "target_1",
"speaker_2", "round_2", "trial_2", "target_2")
df_dtw = read_csv("data/dtw_distance_mirrored.csv") %>%
mutate(pair = as.numeric(pair),
target = target_2,
across(fct_columns, as.factor),
round = round_2) %>%
select(pair, round, target, comparison_id, average_distance, referent:duration_2) %>%
filter(!is.na(round))
head(df_dtw)
### condition info
df_condition_info = read.csv("data/condition_info.csv", stringsAsFactors = TRUE) %>%
mutate(pair = factor(pair),
condition = factor(condition,
levels = c("Sym", "Asym", "AO"),
labels = c("SymAV", "AsymAV", "AO")))
head(df_condition_info)
### merge dataframes
df = df_dtw %>%
left_join(df_condition_info, by="pair") %>%
select(comparison_id, pair, condition, round, target, referent, average_distance) %>%
mutate(condition_sum = condition,
round_c = as.integer(round) - mean(as.integer(round))) #centered round
summarize_data <- function(df){
df %>%
summarize(n = n(),
# dtw distance
mean_dis = mean(average_distance, na.rm = FALSE),
sd_dis = sd(average_distance, na.rm = FALSE),
se_dis = std.error(average_distance, na.rm = FALSE),
lci_dis = mean_dis - qt(1 - (0.05 / 2), n - 1) * se_dis,
uci_dis = mean_dis + qt(1 - (0.05 / 2), n - 1) * se_dis) %>%
ungroup()
}
### create df where each row represents a pair.
df_by_pair = df %>%
group_by(pair, condition) %>%
summarize_data()
### create df where each row represents a condition.
df_by_cond = df %>%
group_by(condition) %>%
summarize_data()
df_by_cond
### create df where each row represents a pair and a round.
df_by_pair_round = df %>%
group_by(pair, condition, round) %>%
summarize_data()
### create df where each row represents a round.
df_by_round = df %>%
group_by(round) %>%
summarize_data()
df_by_round
### create df where each row represents a condition x round.
df_by_cond_round = df %>%
group_by(condition, round) %>%
summarize_data()
df_by_cond_round
rcp_distance = df %>%
ggplot(aes(x = condition, y = average_distance,
fill = condition)) +
ggdist::stat_halfeye(adjust = 1, width = 0.3, .width = 0,
point_color = NA, alpha = 0.6, justification = -0.5) +
geom_jitter(aes(x = stage(start = condition, after_scale = x - 0.2)),
size = 0.2, alpha = 0.3, width = 0.07, height = 0) +
geom_boxplot(width = .2,
outlier.shape = NA,
alpha = 0.7,
color = "black") +
geom_point(data = df_by_cond,
aes(y = mean_dis),
shape = 21, size = 3, fill = "white") +
labs(x="Visibility",
y="DTW distance") +
scale_y_continuous(limits = c(0, 1)) +
scale_fill_manual(values = c("#ED6B06", "#00786A", "darkgrey")) +
theme_clean(base_size = 15) +
theme(axis.text.x = element_text(colour = "black", size = 14),
axis.text.y = element_text(colour = "black", size = 14),
axis.title = element_text(size = 15, face = 'bold'),
axis.title.x = element_text(vjust = -2),
axis.title.y = element_text(vjust = 2),
legend.position = "none",
strip.text = element_text(size = 15, face = 'bold'),
plot.background = element_blank(),
plot.margin = unit(c(1.1,1.1,1.1,1.1), "lines"))
rcp_distance
bp_distance = df %>%
ggplot(aes(x = condition, y = average_distance,
fill = condition)) +
geom_jitter(aes(x = stage(start = condition)),
size = 0.15, alpha = 0.15, width = 0.07, height = 0) +
geom_boxplot(width = .3,
outlier.shape = NA,
alpha = 0.7,
color = "black") +
geom_point(data = df_by_cond,
aes(y = mean_dis),
shape = 21, size = 3, fill = "white") +
labs(x="Visibility",
y="DTW distance") +
scale_y_continuous(limits = c(0, 0.9)) +
scale_fill_manual(values = c("#ED6B06", "#00786A", "darkgrey")) +
theme_clean(base_size = 15) +
theme(axis.text.x = element_text(colour = "black", size = 14),
axis.text.y = element_text(colour = "black", size = 14),
axis.title = element_text(size = 15, face = 'bold'),
axis.title.x = element_text(vjust = -2),
axis.title.y = element_text(vjust = 2),
legend.position = "none",
strip.text = element_text(size = 15, face = 'bold'),
plot.background = element_blank(),
plot.margin = unit(c(1.1,1.1,1.1,1.1), "lines"))
bp_distance
bp_distance_by_cond_round =
ggplot(data=df, aes(x=round, y = average_distance, fill = condition)) +
geom_jitter(aes(x = stage(start = round)),
size = 0.3, alpha = 0.2, width = 0.07, height = 0.02) +
geom_boxplot(width = .5,
outlier.shape = NA, alpha = 0.7) +
geom_point(data = df_by_cond_round,
aes(y = mean_dis, group = round),
shape = 21, size = 2, fill = "white") +
labs(x = "Round",
y = "DTW distance") +
scale_fill_manual(values = c("#ED6B06", "#00786A", "darkgrey")) +
scale_y_continuous(limits = c(0, 0.9)) +
theme_clean(base_size = 15) +
theme(axis.text.x = element_text(colour = "black", size = 14),
axis.text.y = element_text(colour = "black", size = 14),
axis.title = element_text(size = 15, face = 'bold'),
axis.title.x = element_text(vjust = -2),
axis.title.y = element_text(vjust = 2),
legend.position = "none",
strip.text = element_text(size = 15, face = 'bold'),
plot.background = element_blank(),
plot.margin = unit(c(1.1,1.1,1.1,1.1), "lines")) +
facet_grid(cols = vars(condition))
ggplotly(bp_distance_by_cond_round)
We assume the following causal model:
### dag
grViz(
"digraph {
graph [ranksep = 0.3]
node [shape = plaintext]
X [label = Visibility, fontcolor = forestgreen]
Y [label = DTW_distance, fontcolor = darkorange]
Z [label = Round]
edge [minlen = 2.5]
{X -> Y
Z -> Y}
{rank = same; X; Y}
}"
)
Based on the causal inference theory, including the visibility only as fixed effects can estimate its causal effect on DTW distance. As adding rounds should not influence the estiamtes of visibility, we will include rounds as well.
### visibility condition: difference coding
h_cond = hypr(AO_Asym = AsymAV ~ AO,
Asym_Sym = SymAV ~ AsymAV,
levels = levels(df$condition))
h_cond
## hypr object containing 2 null hypotheses:
## H0.AO_Asym: 0 = AsymAV - AO
## H0.Asym_Sym: 0 = SymAV - AsymAV
##
## Call:
## hypr(AO_Asym = ~AsymAV - AO, Asym_Sym = ~SymAV - AsymAV, levels = c("SymAV",
## "AsymAV", "AO"))
##
## Hypothesis matrix (transposed):
## AO_Asym Asym_Sym
## SymAV 0 1
## AsymAV 1 -1
## AO -1 0
##
## Contrast matrix:
## AO_Asym Asym_Sym
## SymAV 1/3 2/3
## AsymAV 1/3 -1/3
## AO -2/3 -1/3
contrasts(df$condition) = contr.hypothesis(h_cond)
### visibility condition: treatment coding
h_cond = hypr(AO_Sym = SymAV ~ AO,
Asym_Sym = SymAV ~ AsymAV,
levels = levels(df$condition))
h_cond
## hypr object containing 2 null hypotheses:
## H0.AO_Sym: 0 = SymAV - AO
## H0.Asym_Sym: 0 = SymAV - AsymAV
##
## Call:
## hypr(AO_Sym = ~SymAV - AO, Asym_Sym = ~SymAV - AsymAV, levels = c("SymAV",
## "AsymAV", "AO"))
##
## Hypothesis matrix (transposed):
## AO_Sym Asym_Sym
## SymAV 1 1
## AsymAV 0 -1
## AO -1 0
##
## Contrast matrix:
## AO_Sym Asym_Sym
## SymAV 1/3 1/3
## AsymAV 1/3 -2/3
## AO -2/3 1/3
contrasts(df$condition_sum) = contr.hypothesis(h_cond)
The mediapipe estimates the locations of each keypoint in the video frame with a number between 0 (top, left) and 1 (bottom, right). As we use normalized dtw distance in which the sum of the distance is normalized by the duration of the annotated gestures, we can assume that most of the dtw distances should be between 0 and 1. Therefore, we expect the mean dtw distance to be around 0.5 and the most likely range of the dtw distance to be between 0 and 1.
Given that the dtw distance cannot be negative, it is not optimal to model the dtw distance with a normal distribution (linear regression). Instead, we will use a log-normal distribution to model the dtw distance. The log-normal distribution is a continuous probability distribution of a random variable whose logarithm is normally distributed. The log-normal distribution is a good choice for modeling the dtw distance because it is always positive and has a long tail to the right.
For log-normal regressions, priors need to be specified on the log scale. As such, we will set a prior for the intercept to be Normal(-0.7, 0.5). This means that we expect the most likely distance score is to be 0.5 (exp(-0.7)) and that the 95% of the time the mean to fall between 0.18 (exp(-1.7)) and 2.01 (exp(0.7)). This prior is weakly informative, as it is still informative about the mean while allowing for a wide range of values.
As for the fixed and random effects, we will set unbiased priors of Normal(0, 0.5). This prior is “unbiased” because we are telling the model that most likely values for the slopes are around 0 (i.e., no effect).
We will use LKJ(2) prior for the correlation matrix.
priors_rint_dtw = c(
prior(normal(-0.7, 0.5), class = Intercept),
prior(normal(0, 0.5), class = b),
prior(normal(0, 0.5), class = sd),
prior(normal(0, 0.5), class = sigma))
priors_rslope_dtw = c(
prior(normal(-0.7, 0.5), class = Intercept),
prior(normal(0, 0.5), class = b),
prior(normal(0, 0.5), class = sd),
prior(normal(0, 0.5), class = sigma),
prior(lkj(2), class = cor))
mln_dtw_prior = brm(average_distance ~ 1 + condition * round_c +
(1+round_c|pair) + (1|target),
family = lognormal(),
prior = priors_rslope_dtw,
sample_prior = "only",
data = df,
file = "models/mln_dtw_prior")
pp_check(mln_dtw_prior, ndraws = 100) +
coord_cartesian(xlim = c(0, 3),
ylim = c(0, 100))
The prior predictive check shows that the model is able to generate data that is consistent with the observed data.
mln_dtw = brm(average_distance ~ 1 + condition * round_c +
(1+round_c|pair) + (1|target),
family = lognormal(),
prior = priors_rslope_dtw,
data = df,
sample_prior = T,
warmup = nwu, iter = niter,
control = list(adapt_delta = 0.9,
max_treedepth = 15),
file = "models/mln_dtw")
model = mln_dtw
summary(model)
## Family: lognormal
## Links: mu = identity; sigma = identity
## Formula: average_distance ~ 1 + condition * round_c + (1 + round_c | pair) + (1 | target)
## Data: df (Number of observations: 1974)
## Draws: 4 chains, each with iter = 20000; warmup = 2000; thin = 1;
## total post-warmup draws = 72000
##
## Multilevel Hyperparameters:
## ~pair (Number of levels: 45)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 0.14 0.02 0.10 0.19 1.00 21893
## sd(round_c) 0.06 0.01 0.03 0.08 1.00 26473
## cor(Intercept,round_c) -0.20 0.23 -0.62 0.28 1.00 27317
## Tail_ESS
## sd(Intercept) 36879
## sd(round_c) 40867
## cor(Intercept,round_c) 41941
##
## ~target (Number of levels: 16)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.05 0.02 0.02 0.09 1.00 19968 21033
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept -1.23 0.03 -1.29 -1.17 1.00 21125
## conditionAO_Asym -0.07 0.07 -0.20 0.06 1.00 20690
## conditionAsym_Sym -0.01 0.06 -0.13 0.10 1.00 17898
## round_c 0.02 0.01 -0.01 0.04 1.00 31397
## conditionAO_Asym:round_c -0.04 0.03 -0.10 0.03 1.00 29983
## conditionAsym_Sym:round_c 0.01 0.03 -0.05 0.06 1.00 26412
## Tail_ESS
## Intercept 32379
## conditionAO_Asym 32915
## conditionAsym_Sym 30492
## round_c 41228
## conditionAO_Asym:round_c 40615
## conditionAsym_Sym:round_c 38341
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.41 0.01 0.40 0.43 1.00 74026 52292
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
bayestestR::hdi(model)
# bayestestR::hdi(model, ci = 0.89)
The coefficients show that the visibility condition did not have a reliable effect on the dtw distance.
plot_posterior(model)
post_sample = as_draws_df(model)
pp_update_plot(post_sample)
pp_check(model, ndraws = 100) +
coord_cartesian(xlim = c(0, 3))
The posterior predictive check shows that the model is able to generate data that is overall consistent with the observed data.
### varying priors for sensitivity analysis
prior_size = c("xs", "s", "l", "xl")
prior_sd = c(0.1, 0.3, 1, 1.5)
bfs_cond_ao_asym = c()
bfs_cond_asym_sym = c()
bfs_round = c()
for (i in 1:length(prior_sd)){
priors = c(
prior(normal(-0.7, 0.5), class = Intercept),
set_prior(paste0("normal(0,", prior_sd[i], ")"), class = "b"),
prior(normal(0, 0.5), class = sd),
prior(normal(0, 0.5), class = sigma),
prior(lkj(2), class = cor))
fname = paste0("models/mln_dtw_", prior_size[i])
fit = brm(average_distance ~ 1 + condition * round_c +
(1+round_c|pair) + (1|target),
family = lognormal(),
prior = priors,
data = df,
sample_prior = T,
save_pars = save_pars(all = TRUE),
warmup = nwu, iter = niter,
control = list(adapt_delta = 0.9,
max_treedepth = 15),
file = fname)
### BF for visibility conditions
# sym - asym
h = hypothesis(fit, "conditionAO_Asym = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_cond_ao_asym = c(bfs_cond_ao_asym, bf)
# sym - ao
h = hypothesis(fit, "conditionAsym_Sym = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_cond_asym_sym = c(bfs_cond_asym_sym, bf)
### BF for rounds
h = hypothesis(model, "round_c = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_round = c(bfs_round, bf)
}
### add BF for the main/medium model
prior_size[3:5] = c("m", prior_size[3:4])
prior_sd[3:5] = c(0.5, prior_sd[3:4])
### BF for visibility
# sym - asym
h = hypothesis(model, "conditionAO_Asym = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_cond_ao_asym[3:5] = c(bf, bfs_cond_ao_asym[3:4])
# sym - ao
h = hypothesis(model, "conditionAsym_Sym = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_cond_asym_sym[3:5] = c(bf, bfs_cond_asym_sym[3:4])
### BF for rounds
h = hypothesis(model, "round_c = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_round[3:5] = c(bf, bfs_round[3:4])
### make a df for BFs
df_bf = data.frame(size = prior_size,
sd = prior_sd,
ao_asym = bfs_cond_ao_asym,
asym_sym = bfs_cond_asym_sym,
round = bfs_round) %>%
mutate(prior = paste0("N(0, ", sd, ")")) %>%
pivot_longer(cols = c("ao_asym", "asym_sym", "round"),
names_to = "Effect",
values_to = "BF10") %>%
mutate(Predictor = ifelse(grepl("round", Effect), "Round", "Visibility"))
df_bf$Effect = recode(df_bf$Effect,
ao_asym = "AO--AsymAV",
asym_sym = "AsymAV--SymAV",
round = "Round")
#### Plot BFs ####
ggplot(df_bf, aes(x = prior, y = BF10, group = Effect)) +
geom_hline(yintercept = 1, linetype="dashed") +
geom_point(aes(color=Effect)) +
geom_line(aes(color=Effect)) +
theme_bw(base_size = 12)+
theme(legend.position = "top")+
scale_y_log10("Bayes factor (BF10)",
breaks = c(0.001, 0.01, 0.03, 0.1, 0.33, 1, 3, 10, 30, 100, 1e4, 1e5, 1e6, 1e7),
labels = c(0.001, 0.01, 0.03, 0.1, 0.33, 1, 3, 10, 30, 100, 1e4, 1e5, 1e6, 1e7)) +
xlab("prior")
p_direction(model)
emmeans(model, pairwise ~ condition)$contrasts
## contrast estimate lower.HPD upper.HPD
## SymAV - AsymAV -0.0131 -0.129 0.1054
## SymAV - AO -0.0826 -0.208 0.0453
## AsymAV - AO -0.0693 -0.199 0.0566
##
## Point estimate displayed: median
## HPD interval probability: 0.95
emmeans(model, pairwise ~ condition, level = 0.89)$contrasts
## contrast estimate lower.HPD upper.HPD
## SymAV - AsymAV -0.0131 -0.107 0.0810
## SymAV - AO -0.0826 -0.185 0.0198
## AsymAV - AO -0.0693 -0.169 0.0379
##
## Point estimate displayed: median
## HPD interval probability: 0.89
plot(conditional_effects(mln_dtw), ask = F)
mln_dtw_sum_prior = brm(average_distance ~ 1 + condition_sum * round_c +
(1+round_c|pair) + (1|target),
family = lognormal(),
prior = priors_rslope_dtw,
sample_prior = "only",
data = df,
file = "models/mln_dtw_sum_prior")
pp_check(mln_dtw_sum_prior, ndraws = 100) +
coord_cartesian(xlim = c(0, 3),
ylim = c(0, 10))
The prior predictive check shows that the model is able to generate data that is consistent with the observed data.
mln_dtw_sum = brm(average_distance ~ 1 + condition_sum * round_c +
(1+round_c|pair) + (1|target),
family = lognormal(),
prior = priors_rslope_dtw,
data = df,
sample_prior = T,
warmup = nwu, iter = niter,
control = list(adapt_delta = 0.9,
max_treedepth = 15),
file = "models/mln_dtw_sum")
model1 = mln_dtw
model = mln_dtw_sum
summary(model)
## Family: lognormal
## Links: mu = identity; sigma = identity
## Formula: average_distance ~ 1 + condition_sum * round_c + (1 + round_c | pair) + (1 | target)
## Data: df (Number of observations: 1974)
## Draws: 4 chains, each with iter = 20000; warmup = 2000; thin = 1;
## total post-warmup draws = 72000
##
## Multilevel Hyperparameters:
## ~pair (Number of levels: 45)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## sd(Intercept) 0.14 0.02 0.10 0.19 1.00 26243
## sd(round_c) 0.06 0.01 0.03 0.08 1.00 30510
## cor(Intercept,round_c) -0.20 0.24 -0.62 0.28 1.00 37154
## Tail_ESS
## sd(Intercept) 43399
## sd(round_c) 46781
## cor(Intercept,round_c) 50981
##
## ~target (Number of levels: 16)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.05 0.02 0.02 0.09 1.00 22557 23543
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept -1.23 0.03 -1.29 -1.17 1.00
## condition_sumAO_Sym -0.08 0.06 -0.21 0.05 1.00
## condition_sumAsym_Sym -0.01 0.06 -0.13 0.11 1.00
## round_c 0.02 0.01 -0.01 0.04 1.00
## condition_sumAO_Sym:round_c -0.03 0.03 -0.09 0.03 1.00
## condition_sumAsym_Sym:round_c 0.01 0.03 -0.05 0.06 1.00
## Bulk_ESS Tail_ESS
## Intercept 28733 39899
## condition_sumAO_Sym 27772 37885
## condition_sumAsym_Sym 24609 38168
## round_c 45747 49636
## condition_sumAO_Sym:round_c 45481 50165
## condition_sumAsym_Sym:round_c 37105 45533
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 0.41 0.01 0.40 0.43 1.00 115803 53607
##
## Draws were sampled using sample(hmc). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
bayestestR::hdi(model)
# bayestestR::hdi(model, ci = 0.89)
The coefficients show that the visibility condition did not have a reliable effect on the dtw distance.
plot_posterior(model1, model)
post_sample = as_draws_df(model)
pp_update_plot(post_sample, interaction = T)
pp_check(model, ndraws = 100) +
coord_cartesian(xlim = c(0, 3))
The posterior predictive check shows that the model is able to generate data that is overall consistent with the observed data.
### varying priors for sensitivity analysis
prior_size = c("xs", "s", "l", "xl")
prior_sd = c(0.1, 0.3, 1, 1.5)
bfs_cond_ao_sym = c()
for (i in 1:length(prior_sd)){
priors = c(
prior(normal(-0.7, 0.5), class = Intercept),
set_prior(paste0("normal(0,", prior_sd[i], ")"), class = "b"),
prior(normal(0, 0.5), class = sd),
prior(normal(0, 0.5), class = sigma),
prior(lkj(2), class = cor))
fname = paste0("models/mln_dtw_sum_", prior_size[i])
fit = brm(average_distance ~ 1 + condition_sum * round_c +
(1+round_c|pair) + (1|target),
family = lognormal(),
prior = priors,
data = df,
sample_prior = T,
save_pars = save_pars(all = TRUE),
warmup = nwu, iter = niter,
control = list(adapt_delta = 0.9,
max_treedepth = 15),
file = fname)
# BF for sym - ao
h = hypothesis(fit, "condition_sumAO_Sym = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_cond_ao_sym = c(bfs_cond_ao_sym, bf)
}
### add BF for the main/medium model
prior_size[3:5] = c("m", prior_size[3:4])
prior_sd[3:5] = c(0.5, prior_sd[3:4])
# BF for sym - ao
h = hypothesis(model, "condition_sumAO_Sym = 0")
bf = 1 / abs(h$hypothesis$Evid.Ratio)
bfs_cond_ao_sym[3:5] = c(bf, bfs_cond_ao_sym[3:4])
### make a df for BFs
df_bf_temp = data.frame(size = prior_size,
sd = prior_sd,
Effect = "AO--SymAV",
BF10 = bfs_cond_ao_sym) %>%
mutate(prior = paste0("N(0, ", sd, ")"),
Predictor = "Visibility")
df_bf_new = df_bf %>%
rbind(df_bf_temp) %>%
mutate(Effect = factor(Effect,
levels = c("AO--SymAV", "AO--AsymAV", "AsymAV--SymAV", "Round")),
Predictor = factor(Predictor,
levels = c("Visibility", "Round")))
df_bf_new %>% arrange(Effect, sd)
#### Plot BFs ####
ggplot(df_bf_new,
aes(x = factor(sd), y = BF10, group = Effect)) +
geom_hline(yintercept = 1, linetype="dashed") +
geom_point(aes(color=Effect)) +
geom_line(aes(color=Effect)) +
facet_wrap(vars(Predictor)) +
theme_clean(base_size = 15) +
theme(axis.text.x = element_text(colour = "black", size = 14),
axis.text.y = element_text(colour = "black", size = 14),
axis.title = element_text(size = 15, face = 'bold'),
axis.title.x = element_text(vjust = -2),
axis.title.y = element_text(vjust = 2),
legend.position = "right",
strip.text = element_text(size = 15, face = 'bold'),
plot.background = element_blank(),
plot.margin = unit(c(1.1,1.1,1.1,1.1), "lines")) +
scale_y_log10("Bayes factor (BF10)",
breaks = c(0.001, 0.03, 0.01, 0.1, 0.33, 1, 3, 10, 30, 100),
labels = c(0.001, 0.03, 0.01, 0.1, 0.33, 1, 3, 10, 30, 100)) +
xlab("SD for the prior")
p_direction(model)
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22631)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] rstan_2.32.6 StanHeaders_2.32.7 doParallel_1.0.17 iterators_1.0.14
## [5] foreach_1.5.2 emmeans_1.10.7 tidybayes_3.0.7 bayestestR_0.15.2
## [9] brms_2.22.0 Rcpp_1.0.12 ggExtra_0.10.1 plotly_4.10.4
## [13] DiagrammeR_1.0.11 ggthemes_5.1.0 ggh4x_0.3.1 hypr_0.2.8
## [17] plotrix_3.8-4 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
## [21] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [25] tibble_3.2.1 ggplot2_3.5.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.1-0 estimability_1.5.1 QuickJSR_1.1.3
## [4] rstudioapi_0.17.1 farver_2.1.1 svUnit_1.0.6
## [7] bit64_4.0.5 mvtnorm_1.2-4 bridgesampling_1.1-2
## [10] codetools_0.2-18 cachem_1.0.8 knitr_1.49
## [13] bayesplot_1.11.1 jsonlite_1.8.8 ggdist_3.3.2
## [16] shiny_1.11.1 compiler_4.2.2 httr_1.4.7
## [19] backports_1.4.1 Matrix_1.5-1 fastmap_1.1.1
## [22] lazyeval_0.2.2 cli_3.6.2 later_1.3.2
## [25] visNetwork_2.1.2 htmltools_0.5.8.1 tools_4.2.2
## [28] coda_0.19-4.1 gtable_0.3.6 glue_1.7.0
## [31] reshape2_1.4.4 posterior_1.6.1 V8_6.0.6
## [34] jquerylib_0.1.4 vctrs_0.6.5 nlme_3.1-160
## [37] crosstalk_1.2.1 insight_1.1.0 tensorA_0.36.2.1
## [40] xfun_0.53 ps_1.7.6 timechange_0.3.0
## [43] mime_0.12 miniUI_0.1.1.1 lifecycle_1.0.4
## [46] MASS_7.3-58.1 scales_1.3.0 vroom_1.6.5
## [49] ragg_1.3.0 hms_1.1.3 promises_1.3.3
## [52] Brobdingnag_1.2-9 inline_0.3.21 RColorBrewer_1.1-3
## [55] curl_6.2.1 yaml_2.3.8 gridExtra_2.3
## [58] loo_2.8.0 sass_0.4.9 stringi_1.8.3
## [61] checkmate_2.3.1 pkgbuild_1.4.6 cmdstanr_0.8.1
## [64] rlang_1.1.3 pkgconfig_2.0.3 systemfonts_1.0.6
## [67] matrixStats_1.3.0 distributional_0.5.0 pracma_2.4.4
## [70] evaluate_1.0.3 lattice_0.20-45 rstantools_2.4.0
## [73] htmlwidgets_1.6.4 labeling_0.4.3 processx_3.8.4
## [76] bit_4.0.5 tidyselect_1.2.1 plyr_1.8.9
## [79] magrittr_2.0.3 R6_2.6.1 generics_0.1.3
## [82] pillar_1.10.1 withr_3.0.2 datawizard_1.0.1
## [85] abind_1.4-8 crayon_1.5.3 arrayhelpers_1.1-0
## [88] tzdb_0.4.0 rmarkdown_2.29 grid_4.2.2
## [91] data.table_1.15.4 digest_0.6.35 xtable_1.8-4
## [94] httpuv_1.6.15 textshaping_0.3.7 stats4_4.2.2
## [97] RcppParallel_5.1.7 munsell_0.5.1 viridisLite_0.4.2
## [100] bslib_0.9.0